{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 获取Web数据\n",
    "\n",
    "通过requests和beautifulsoap(bs4)库来访问web pages，并进行数据抓取和分析。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import pprint as pp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 基本请求。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "r = requests.get('http://httpbin.org/get')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "200"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 返回的状态码。200-OK,404-NOT FOUND.\n",
    "r.status_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<RequestsCookieJar[]>"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 本地保存的标识信息，服务器用于收集信息和识别客户端。\n",
    "r.cookies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Connection': 'keep-alive', 'Server': 'gunicorn/19.8.1', 'Date': 'Wed, 25 Jul 2018 04:29:27 GMT', 'Content-Type': 'application/json', 'Content-Length': '208', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true', 'Via': '1.1 vegur'}"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# http发送的头部信息，每一个请求都会包含。\n",
    "r.headers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'keep-alive'"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r.headers.get(\"Connection\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b'{\"args\":{},\"headers\":{\"Accept\":\"*/*\",\"Accept-Encoding\":\"gzip, deflate\",\"Connection\":\"close\",\"Host\":\"httpbin.org\",\"User-Agent\":\"python-requests/2.19.1\"},\"origin\":\"35.193.18.41\",\"url\":\"http://httpbin.org/get\"}\\n'"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# http请求中的content部分。\n",
    "r.content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"args\":{},\"headers\":{\"Accept\":\"*/*\",\"Accept-Encoding\":\"gzip, deflate\",\"Connection\":\"close\",\"Host\":\"httpbin.org\",\"User-Agent\":\"python-requests/2.19.1\"},\"origin\":\"35.193.18.41\",\"url\":\"http://httpbin.org/get\"}\\n'"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# http请求返回值的纯数据部分。\n",
    "r.text"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 获取另外一个地址。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = requests.get('http://google.com')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<RequestsCookieJar[Cookie(version=0, name='1P_JAR', value='2018-07-25-04', port=None, port_specified=False, domain='.google.com', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=1535083891, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False), Cookie(version=0, name='NID', value='135=qfi76jat1X5Fqbo4LVJvONvC9ik5-24E_I-940-gWR_EVaHXbZN3rzGTtLzFEkXfiz1WvZUV7OLxeQttbjODpuL3jroDzsocmdzzKpB-vf3SYmzP2clp17rchR_ia-Zf', port=None, port_specified=False, domain='.google.com', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=1548303091, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)]>"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g.cookies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('<!doctype html><html itemscope=\"\" itemtype=\"http://schema.org/WebPage\" '\n",
      " 'lang=\"en\"><head><meta content=\"Search the world\\'s information, including '\n",
      " 'webpages, images, videos and more. Google has many special features to help '\n",
      " 'you find exactly what you\\'re looking for.\" name=\"description\"><meta '\n",
      " 'content=\"noodp\" name=\"robots\"><meta content=\"text/html; charset=UTF-8\" '\n",
      " 'http-equiv=\"Content-Type\"><meta '\n",
      " 'content=\"/images/branding/googleg/1x/googleg_standard_color_128dp.png\" '\n",
      " 'itemprop=\"image\"><title>Google</title><script '\n",
      " 'nonce=\"QA9aPd6GP21fPJsbP9VV6w==\">(function(){window.google={kEI:\\'c_hXW7z_BsfdjwSHop-QAw\\',kEXPI:\\'0,201847,1151900,57,1654,246,58,1016,40,242,709,1004,636,773,156,139,446,2339139,208,219,32,329294,1294,12383,2349,2506,32691,15248,867,316,10445,1402,6381,3335,2,2,6801,364,553,664,326,1776,113,1149,1052,3191,224,502,5,349,1362,130,130,1028,241,2473,1365,444,131,1119,2,1306,310,296,1825,59,2,1,3,1297,1712,1376,505,730,377,1719,608,50,636,8,302,1267,774,193,1039,1015,284,2,841,283,1701,437,1199,23,502,22,437,162,5,2,2,657,1641,121,2,70,3,774,44,513,301,614,283,509,1587,214,82,1179,165,38,31,331,468,11,43,18,7,4,168,334,17,39,67,7,1110,78,51,105,386,8,584,1,313,19,2,84,81,7,1,2,25,463,243,198,179,29,708,186,4,4,4,6,71,6,406,109,342,7,63,81,135,14,338,1247,497,79,162,536,66,112,748,135,270,46,321,304,1,22,420,375,374,14,451,145,223,16,2322884,3685998,2554,5997648,2800138,4,1572,549,332,445,1,2,1,1,77,1,1,900,578,14,304,1,8,1,2,1,1,1953,10,11,156,1,1,1,1,1,47,16,41,2,1,2,1,34,5,7,19,14,2,2,22309608\\',authuser:0,kscs:\\'c9c918f0_c_hXW7z_BsfdjwSHop-QAw\\',kGL:\\'US\\'};google.kHL=\\'en\\';})();google.time=function(){return(new '\n",
      " 'Date).getTime()};(function(){google.lc=[];google.li=0;google.getEI=function(a){for(var '\n",
      " 'b;a&&(!a.getAttribute||!(b=a.getAttribute(\"eid\")));)a=a.parentNode;return '\n",
      " 'b||google.kEI};google.getLEI=function(a){for(var '\n",
      " 'b=null;a&&(!a.getAttribute||!(b=a.getAttribute(\"leid\")));)a=a.parentNode;return '\n",
      " 'b};google.https=function(){return\"https:\"==window.location.protocol};google.ml=function(){return '\n",
      " 'null};google.wl=function(a,b){try{google.ml(Error(a),!1,b)}catch(d){}};google.log=function(a,b,d,c,g){if(a=google.logUrl(a,b,d,c,g)){b=new '\n",
      " 'Image;var '\n",
      " 'e=google.lc,f=google.li;e[f]=b;b.onerror=b.onload=b.onabort=function(){delete '\n",
      " 'e[f]};google.vel&&google.vel.lu&&google.vel.lu(a);b.src=a;google.li=f+1}};google.logUrl=function(a,b,d,c,g){var '\n",
      " 'e=\"\",f=google.ls||\"\";d||-1!=b.search(\"&ei=\")||(e=\"&ei=\"+google.getEI(c),-1==b.search(\"&lei=\")&&(c=google.getLEI(c))&&(e+=\"&lei=\"+c));c=\"\";!d&&google.cshid&&-1==b.search(\"&cshid=\")&&\"slh\"!=a&&(c=\"&cshid=\"+google.cshid);a=d||\"/\"+(g||\"gen_204\")+\"?atyp=i&ct=\"+a+\"&cad=\"+b+e+f+\"&zx=\"+google.time()+c;/^http:/i.test(a)&&google.https()&&(google.ml(Error(\"a\"),!1,{src:a,glmm:1}),a=\"\");return '\n",
      " 'a};}).call(this);(function(){google.y={};google.x=function(a,b){if(a)var '\n",
      " 'c=a.id;else{do '\n",
      " 'c=Math.random();while(google.y[c])}google.y[c]=[a,b];return!1};google.lm=[];google.plm=function(a){google.lm.push.apply(google.lm,a)};google.lq=[];google.load=function(a,b,c){google.lq.push([[a],b,c])};google.loadAll=function(a,b){google.lq.push([a,b])};}).call(this);google.f={};</script><script '\n",
      " 'nonce=\"QA9aPd6GP21fPJsbP9VV6w==\">var '\n",
      " 'a=window.location,b=a.href.indexOf(\"#\");if(0<=b){var '\n",
      " 'c=a.href.substring(b+1);/(^|&)q=/.test(c)&&-1==c.indexOf(\"#\")&&a.replace(\"/search?\"+c.replace(/(^|&)fp=[^&]*/g,\"\")+\"&cad=h\")};</script><style>#gbar,#guser{font-size:13px;padding-top:1px '\n",
      " '!important;}#gbar{height:22px}#guser{padding-bottom:7px '\n",
      " '!important;text-align:right}.gbh,.gbd{border-top:1px solid '\n",
      " '#c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media '\n",
      " 'all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline '\n",
      " '!important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 '\n",
      " '!important}.gbf .gb4{color:#900 !important}\\n'\n",
      " '</style><style>body,td,a,p,.h{font-family:arial,sans-serif}body{margin:0;overflow-y:scroll}#gog{padding:3px '\n",
      " '8px 0}td{line-height:.8em}.gac_m '\n",
      " 'td{line-height:17px}form{margin-bottom:20px}.h{color:#36c}.q{color:#00c}.ts '\n",
      " 'td{padding:0}.ts{border-collapse:collapse}em{font-weight:bold;font-style:normal}.lst{height:25px;width:496px}.gsfi,.lst{font:18px '\n",
      " 'arial,sans-serif}.gsfs{font:17px '\n",
      " 'arial,sans-serif}.ds{display:inline-box;display:inline-block;margin:3px 0 '\n",
      " '4px;margin-left:4px}input{font-family:inherit}a.gb1,a.gb2,a.gb3,a.gb4{color:#11c '\n",
      " '!important}body{background:#fff;color:black}a{color:#11c;text-decoration:none}a:hover,a:active{text-decoration:underline}.fl '\n",
      " 'a{color:#36c}a:visited{color:#551a8b}a.gb1,a.gb4{text-decoration:underline}a.gb3:hover{text-decoration:none}#ghead '\n",
      " 'a.gb2:hover{color:#fff !important}.sblc{padding-top:5px}.sblc '\n",
      " 'a{display:block;margin:2px '\n",
      " '0;margin-left:13px;font-size:11px}.lsbb{background:#eee;border:solid '\n",
      " '1px;border-color:#ccc #999 #999 '\n",
      " '#ccc;height:30px}.lsbb{display:block}.ftl,#fll '\n",
      " 'a{display:inline-block;margin:0 '\n",
      " '12px}.lsb{background:url(/images/nav_logo229.png) 0 -261px '\n",
      " 'repeat-x;border:none;color:#000;cursor:pointer;height:30px;margin:0;outline:0;font:15px '\n",
      " 'arial,sans-serif;vertical-align:top}.lsb:active{background:#ccc}.lst:focus{outline:none}</style><script '\n",
      " 'nonce=\"QA9aPd6GP21fPJsbP9VV6w==\"></script><link '\n",
      " 'href=\"/images/branding/product/ico/googleg_lodp.ico\" rel=\"shortcut '\n",
      " 'icon\"></head><body bgcolor=\"#fff\"><script '\n",
      " 'nonce=\"QA9aPd6GP21fPJsbP9VV6w==\">(function(){var '\n",
      " \"src='/images/nav_logo229.png';var iesg=false;document.body.onload = \"\n",
      " 'function(){window.n && window.n();if (document.images){new '\n",
      " 'Image().src=src;}\\n'\n",
      " 'if '\n",
      " '(!iesg){document.f&&document.f.q.focus();document.gbqf&&document.gbqf.q.focus();}\\n'\n",
      " '}\\n'\n",
      " '})();</script><div id=\"mngb\"> <div id=gbar><nobr><b class=gb1>Search</b> <a '\n",
      " 'class=gb1 href=\"http://www.google.com/imghp?hl=en&tab=wi\">Images</a> <a '\n",
      " 'class=gb1 href=\"http://maps.google.com/maps?hl=en&tab=wl\">Maps</a> <a '\n",
      " 'class=gb1 href=\"https://play.google.com/?hl=en&tab=w8\">Play</a> <a class=gb1 '\n",
      " 'href=\"http://www.youtube.com/?gl=US&tab=w1\">YouTube</a> <a class=gb1 '\n",
      " 'href=\"http://news.google.com/nwshp?hl=en&tab=wn\">News</a> <a class=gb1 '\n",
      " 'href=\"https://mail.google.com/mail/?tab=wm\">Gmail</a> <a class=gb1 '\n",
      " 'href=\"https://drive.google.com/?tab=wo\">Drive</a> <a class=gb1 '\n",
      " 'style=\"text-decoration:none\" '\n",
      " 'href=\"https://www.google.com/intl/en/options/\"><u>More</u> '\n",
      " '&raquo;</a></nobr></div><div id=guser width=100%><nobr><span id=gbn '\n",
      " 'class=gbi></span><span id=gbf class=gbf></span><span id=gbe></span><a '\n",
      " 'href=\"http://www.google.com/history/optout?hl=en\" class=gb4>Web History</a> '\n",
      " '| <a  href=\"/preferences?hl=en\" class=gb4>Settings</a> | <a target=_top '\n",
      " 'id=gb_70 '\n",
      " 'href=\"https://accounts.google.com/ServiceLogin?hl=en&passive=true&continue=http://www.google.com/\" '\n",
      " 'class=gb4>Sign in</a></nobr></div><div class=gbh style=left:0></div><div '\n",
      " 'class=gbh style=right:0></div> </div><center><br clear=\"all\" id=\"lgpd\"><div '\n",
      " 'id=\"lga\"><img alt=\"Google\" height=\"92\" '\n",
      " 'src=\"/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.png\" '\n",
      " 'style=\"padding:28px 0 14px\" width=\"272\" id=\"hplogo\" '\n",
      " 'onload=\"window.lol&&lol()\"><br><br></div><form action=\"/search\" '\n",
      " 'name=\"f\"><table cellpadding=\"0\" cellspacing=\"0\"><tr valign=\"top\"><td '\n",
      " 'width=\"25%\">&nbsp;</td><td align=\"center\" nowrap=\"\"><input name=\"ie\" '\n",
      " 'value=\"ISO-8859-1\" type=\"hidden\"><input value=\"en\" name=\"hl\" '\n",
      " 'type=\"hidden\"><input name=\"source\" type=\"hidden\" value=\"hp\"><input '\n",
      " 'name=\"biw\" type=\"hidden\"><input name=\"bih\" type=\"hidden\"><div class=\"ds\" '\n",
      " 'style=\"height:32px;margin:4px 0\"><input '\n",
      " 'style=\"color:#000;margin:0;padding:5px 8px 0 6px;vertical-align:top\" '\n",
      " 'autocomplete=\"off\" class=\"lst\" value=\"\" title=\"Google Search\" '\n",
      " 'maxlength=\"2048\" name=\"q\" size=\"57\"></div><br style=\"line-height:0\"><span '\n",
      " 'class=\"ds\"><span class=\"lsbb\"><input class=\"lsb\" value=\"Google Search\" '\n",
      " 'name=\"btnG\" type=\"submit\"></span></span><span class=\"ds\"><span '\n",
      " 'class=\"lsbb\"><input class=\"lsb\" value=\"I\\'m Feeling Lucky\" name=\"btnI\" '\n",
      " 'onclick=\"if(this.form.q.value)this.checked=1; else '\n",
      " 'top.location=\\'/doodles/\\'\" type=\"submit\"></span></span></td><td class=\"fl '\n",
      " 'sblc\" align=\"left\" nowrap=\"\" width=\"25%\"><a '\n",
      " 'href=\"/advanced_search?hl=en&amp;authuser=0\">Advanced search</a><a '\n",
      " 'href=\"/language_tools?hl=en&amp;authuser=0\">Language '\n",
      " 'tools</a></td></tr></table><input id=\"gbv\" name=\"gbv\" type=\"hidden\" '\n",
      " 'value=\"1\"></form><div id=\"gac_scont\"></div><div '\n",
      " 'style=\"font-size:83%;min-height:3.5em\"><br><div '\n",
      " 'id=\"prm\"><style>.szppmdbYutt__middle-slot-promo{font-size:small;margin-bottom:32px}.szppmdbYutt__middle-slot-promo '\n",
      " 'a.ZIeIlb{display:inline-block;text-decoration:none}.szppmdbYutt__middle-slot-promo '\n",
      " 'img{border:none;margin-right:5px;vertical-align:middle}</style><div '\n",
      " 'class=\"szppmdbYutt__middle-slot-promo\" '\n",
      " 'data-ved=\"0ahUKEwj8hKqWsbncAhXH7oMKHQfRBzIQnIcBCAQ\"><a class=\"NKcBbd\" '\n",
      " 'href=\"https://www.google.com/url?q=https://www.youtube.com/attribution_link%3Fc%3Dytmphpp-ytmp-acq-int-gh-txt-ytmphpp0718%26u%3Dhttps%253A%252F%252Fmusic.youtube.com%252F&amp;source=hpp&amp;id=19007460&amp;ct=3&amp;usg=AFQjCNFY9xfGiLYNG6CzONu1_juXLfxmzA&amp;sa=X&amp;ved=0ahUKEwj8hKqWsbncAhXH7oMKHQfRBzIQ8IcBCAU\" '\n",
      " 'rel=\"nofollow\">The new YouTube Music is here</a></div></div></div><span '\n",
      " 'id=\"footer\"><div style=\"font-size:10pt\"><div style=\"margin:19px '\n",
      " 'auto;text-align:center\" id=\"fll\"><a href=\"/intl/en/ads/\">Advertising\\xa0'\n",
      " 'Programs</a><a href=\"/services/\">Business Solutions</a><a '\n",
      " 'href=\"https://plus.google.com/116899029375914044550\" '\n",
      " 'rel=\"publisher\">+Google</a><a href=\"/intl/en/about.html\">About '\n",
      " 'Google</a></div></div><p style=\"color:#767676;font-size:8pt\">&copy; 2018 - '\n",
      " '<a href=\"/intl/en/policies/privacy/\">Privacy</a> - <a '\n",
      " 'href=\"/intl/en/policies/terms/\">Terms</a></p></span></center><script '\n",
      " 'nonce=\"QA9aPd6GP21fPJsbP9VV6w==\">(function(){window.google.cdo={height:0,width:0};(function(){var '\n",
      " 'a=window.innerWidth,b=window.innerHeight;if(!a||!b){var '\n",
      " 'c=window.document,d=\"CSS1Compat\"==c.compatMode?c.documentElement:c.body;a=d.clientWidth;b=d.clientHeight}a&&b&&(a!=google.cdo.width||b!=google.cdo.height)&&google.log(\"\",\"\",\"/client_204?&atyp=i&biw=\"+a+\"&bih=\"+b+\"&ei=\"+google.kEI);}).call(this);})();</script><div '\n",
      " 'id=\"xjsd\"></div><div id=\"xjsi\"><script '\n",
      " 'nonce=\"QA9aPd6GP21fPJsbP9VV6w==\">(function(){function '\n",
      " 'c(b){window.setTimeout(function(){var '\n",
      " 'a=document.createElement(\"script\");a.src=b;google.timers&&google.timers.load.t&&google.tick&&google.tick(\"load\",{gen204:\"xjsls\",clearcut:31});document.getElementById(\"xjsd\").appendChild(a)},0)}google.dljp=function(b,a){google.xjsu=b;c(a)};google.dlj=c;}).call(this);if(!google.xjs){window._=window._||{};window._DumpException=window._._DumpException=function(e){throw '\n",
      " 'e};window._F_installCss=window._._F_installCss=function(c){};google.dljp(\\'/xjs/_/js/k\\\\x3dxjs.hp.en_US.U_7nFu05n1k.O/m\\\\x3dsb_he,d/am\\\\x3dhIKxAQ/rt\\\\x3dj/d\\\\x3d1/rs\\\\x3dACT90oEqhoXu40MUvthKMTupIRo5p47y8Q\\',\\'/xjs/_/js/k\\\\x3dxjs.hp.en_US.U_7nFu05n1k.O/m\\\\x3dsb_he,d/am\\\\x3dhIKxAQ/rt\\\\x3dj/d\\\\x3d1/rs\\\\x3dACT90oEqhoXu40MUvthKMTupIRo5p47y8Q\\');google.xjs=1;}google.pmc={\"sb_he\":{\"agen\":true,\"cgen\":true,\"client\":\"heirloom-hp\",\"dh\":true,\"dhqt\":true,\"ds\":\"\",\"ffql\":\"en\",\"fl\":true,\"host\":\"google.com\",\"isbh\":28,\"jsonp\":true,\"msgs\":{\"cibl\":\"Clear '\n",
      " 'Search\",\"dym\":\"Did you mean:\",\"lcky\":\"I\\\\u0026#39;m Feeling '\n",
      " 'Lucky\",\"lml\":\"Learn more\",\"oskt\":\"Input tools\",\"psrc\":\"This search was '\n",
      " 'removed from your \\\\u003Ca href=\\\\\"/history\\\\\"\\\\u003EWeb '\n",
      " 'History\\\\u003C/a\\\\u003E\",\"psrl\":\"Remove\",\"sbit\":\"Search by '\n",
      " 'image\",\"srch\":\"Google '\n",
      " 'Search\"},\"nds\":true,\"ovr\":{},\"pq\":\"\",\"refpd\":true,\"rfs\":[],\"sbpl\":24,\"sbpr\":24,\"scd\":10,\"sce\":5,\"stok\":\"U8AkLg_bqhI_BVJklu2rqCQxCkk\"},\"d\":{},\"ZI/YVQ\":{},\"Qnk92g\":{},\"U5B21g\":{},\"DPBNMg\":{},\"YFCs/g\":{}};google.x(null,function(){});(function(){var '\n",
      " 'r=[];google.plm(r);})();(function(){var m=[]\\n'\n",
      " ';google.jsc && google.jsc.m(m);})();</script></div></body></html>')\n"
     ]
    }
   ],
   "source": [
    "pp.pprint(g.text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 带参数的请求。\n",
    "\n",
    "- 使用get方法，参数放在url?param=values的形式。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://www.so.com/s?q=%E6%9D%A8%E5%BD%A6%E6%98%9F'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pqyload = {'q':'杨彦星'}\n",
    "r = requests.get('http://www.so.com/s', params=pqyload)\n",
    "r.url "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 使用post方法，参数在url中不可见，放在请求的head里。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('{\"args\":{},\"data\":\"\",\"files\":{},\"form\":{\"a\":\"\\\\u6768\",\"b\":\"hello\"},\"headers\":{\"Accept\":\"*/*\",\"Accept-Encoding\":\"gzip, '\n",
      " 'deflate\",\"Connection\":\"close\",\"Content-Length\":\"19\",\"Content-Type\":\"application/x-www-form-urlencoded\",\"Host\":\"httpbin.org\",\"User-Agent\":\"python-requests/2.19.1\"},\"json\":null,\"origin\":\"35.193.18.41\",\"url\":\"http://httpbin.org/post\"}\\n')\n"
     ]
    }
   ],
   "source": [
    "payload =  {'a':'杨','b':'hello'}\n",
    "r = requests.post(\"http://httpbin.org/post\", data=payload)\n",
    "pp.pprint(r.text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- data不光可以接受字典类型的数据，还可以接受json等格式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "payload = {     'a'     :     '杨'     ,     'b'     :     'hello'     }\n",
    "\n",
    "import  json\n",
    "r     =     requests.post(     'http://httpbin.org/post'     , data     =     json.dumps(payload)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"args\":{},\"data\":\"{\\\\\"a\\\\\": \\\\\"\\\\\\\\u6768\\\\\", \\\\\"b\\\\\": \\\\\"hello\\\\\"}\",\"files\":{},\"form\":{},\"headers\":{\"Accept\":\"*/*\",\"Accept-Encoding\":\"gzip, deflate\",\"Connection\":\"close\",\"Content-Length\":\"29\",\"Host\":\"httpbin.org\",\"User-Agent\":\"python-requests/2.19.1\"},\"json\":{\"a\":\"\\\\u6768\",\"b\":\"hello\"},\"origin\":\"35.193.18.41\",\"url\":\"http://httpbin.org/post\"}\\n'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r.text"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 发送文件的post类型，这个相当于向网站上传一张图片，文档等操作，这时要使用files参数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'http://httpbin.org/post'\n",
    "files = {'file':open ('touxiang.png','rb')}\n",
    "r = requests.post(url, files     =     files) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **定制headers，使用headers参数来传递.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "url = 'https://api.github.com/some/endpoint'\n",
    "payload = {'some':'data'}\n",
    "headers = {'content-type':'application/json'}\n",
    " \n",
    "r = requests.post(url, data = json.dumps(payload), headers = headers) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"message\":\"Not Found\",\"documentation_url\":\"https://developer.github.com/v3\"}'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r.text"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 高级操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Response [200]>"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 设置超时。\n",
    "requests.get('http://github.com', timeout=1)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"cookies\":{\"cookies_are\":\"working\"}}\\n'"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#自已定义请求的COOKIES.\n",
    "\n",
    "url     = 'http://httpbin.org/cookies'\n",
    "cookies = {'cookies_are':'working'}\n",
    "r       = requests.get(url,cookies = cookies) \n",
    "r.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
